{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import pathlib\n",
    "import yaml\n",
    "import re\n",
    "import os\n",
    "import sys\n",
    "\n",
    "from json import load, dump\n",
    "from typing import Set\n",
    "from os.path import join, exists, basename\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('./resources/supernova.yaml', 'r') as fp:\n",
    "    data = yaml.safe_load(fp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "bibtex_key_to_name = {}\n",
    "for item in data:\n",
    "    bibtex_key_to_name[item['bibtexKey']] = item['name']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "193"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with open('./notebook-va.bib', 'r') as fp:\n",
    "    lines = [l for l in fp.readlines()]\n",
    "\n",
    "bibtex_entries = []\n",
    "cur_entry = ''\n",
    "for line in lines:\n",
    "    if line == '\\n':\n",
    "       bibtex_entries.append(cur_entry)\n",
    "       cur_entry = ''\n",
    "\n",
    "    else:\n",
    "        cur_entry += line\n",
    "\n",
    "bibtex_entries.append(cur_entry)\n",
    "\n",
    "# Create a dictionary that maps bibtex key to the entry\n",
    "bibtex_dict = {}\n",
    "\n",
    "for i, entry in enumerate(bibtex_entries):\n",
    "    first_line = entry.split('\\n')[0]\n",
    "    key = re.sub(r'^@.+{(.+),.*$', r'\\1', first_line)\n",
    "\n",
    "    # Also parse the year\n",
    "    year = int(re.sub(r'^.*(\\d{4})[a-z]?$', r'\\1', key))\n",
    "\n",
    "    bibtex_dict[key] = {\n",
    "        'bibtex': entry,\n",
    "        'year': year\n",
    "    }\n",
    "\n",
    "len(bibtex_entries)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "doi_map = {}\n",
    "year_map = {}\n",
    "\n",
    "for entry in bibtex_entries:\n",
    "    first_line = entry.split('\\n')[0]\n",
    "    key = re.sub(r'^@.+{(.+),.*$', r'\\1', first_line)\n",
    "    year = int(re.sub(r'^.*(\\d{4})[a-z]?$', r'\\1', key))\n",
    "\n",
    "    if key not in bibtex_key_to_name:\n",
    "        continue\n",
    "\n",
    "    name = bibtex_key_to_name[key]\n",
    "    year_map[name] = year\n",
    "\n",
    "    match = re.search(r'.*doi = {(.*?)}.*', entry)\n",
    "    if match:\n",
    "        doi = match.group(1)\n",
    "        doi_map[name] = doi\n",
    "        continue\n",
    "    else:\n",
    "        match = re.search(r'.*url = {(.*?)}.*', entry)\n",
    "        if match:\n",
    "            url = match.group(1)\n",
    "            if 'arxiv' in url:\n",
    "                arxiv = 'arXiv:' + url.split('/')[-1]\n",
    "                doi_map[name] = arxiv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "81"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(doi_map)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Query Citation Count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = 'https://api.semanticscholar.org/graph/v1/paper/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Query citation count from semantic scholar\n",
    "citation_map = {}\n",
    "failed_dois = []\n",
    "\n",
    "for name in doi_map:\n",
    "    doi = doi_map[name]\n",
    "    cur_url = url + doi\n",
    "    parameter = {\n",
    "        'fields': 'citationCount'\n",
    "    }\n",
    "    response = requests.get(cur_url, parameter).json()\n",
    "\n",
    "    if 'citationCount' in response:\n",
    "        count = response['citationCount']\n",
    "        citation_map[name] = count\n",
    "    else:\n",
    "        failed_dois.append(doi)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "networkit 10.1109/IPDPSW55747.2022.00055\n",
      "mols2grid 10.5281/zenodo.6591473\n",
      "ivpy 10.11588/DAH.2019.4.66401\n",
      "cytoscapejs 10.5281/ZENODO.6828253\n",
      "geopandas 10.5281/ZENODO.7422493\n",
      "dea-tools 10.26186/145234\n",
      "shap 10.48550/arXiv.1705.07874\n",
      "itkwidgets 10.5281/ZENODO.7489693\n",
      "data+shift 10.2312/EVS.20221097\n",
      "tissuumaps 10.1101/2022.01.28.478131\n"
     ]
    }
   ],
   "source": [
    "for doi in failed_dois:\n",
    "    cur_name = ''\n",
    "    for name in doi_map:\n",
    "        if doi_map[name] == doi:\n",
    "            cur_name = name\n",
    "            break\n",
    "\n",
    "    print(cur_name, doi)\n",
    "    # print(f\"citation_map['{cur_name}'] = 0\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Manually add citation for failed papers\n",
    "citation_map['networkit'] = 1\n",
    "citation_map['mols2grid'] = 0\n",
    "citation_map['ivpy'] = 6\n",
    "citation_map['cytoscapejs'] = 0\n",
    "citation_map['geopandas'] = 0\n",
    "citation_map['dea-tools'] = 0\n",
    "citation_map['shap'] = 11652\n",
    "citation_map['itkwidgets'] = 0\n",
    "citation_map['data+shift'] = 3\n",
    "citation_map['tissuumaps'] = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "citation_info = {}\n",
    "\n",
    "for name in citation_map:\n",
    "    citation_info[name] = {\n",
    "        'citation': citation_map[name],\n",
    "        'year': year_map[name]\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "dump(citation_info, open('./resources/citation_info.json', 'w'))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "nlp",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
